global root_dir = "`1'"

include "$root_dir/code/config/config.do"


cap noi log using ${log_dir}/BvD_groups.log, replace name(dat)

*Handle empty arguments
global arg1 = cond("`2'" == "___EMPTY___", "", "`2'")
global arg2 = cond("`3'" == "___EMPTY___", "", "`3'")
global arg3 = cond("`4'" == "___EMPTY___", "", "`4'")
global arg4 = cond("`5'" == "___EMPTY___", "", "`5'")

if "$arg1" != "" {
    global weight_category "$arg1"
    di "Weight category: ${weight_category}"
}

if "$arg2" != "" {
    global weight_versions "$arg2"
    di "Weight versions: ${weight_versions}"
}

if "$arg3" != "" {
    global weight_window "$arg3"
    di "Weight window: ${weight_window}"
}

if "$arg4" != "" {
	global wtype "$arg4"
}
di "${wtype}"
capture noi {


* This do-file lists groups of firms based on their number of patents, only used for table A 4a, but seperate from it as we cannot include the orbis files *

*get the correct aggregation of firms (subentities etc)
import delimited using ${commondata_dir}/orbis_patents/firm_merge_map.csv, varnames(1) clear
ren bvdid_source BvDIDnumber
tempfile f
save `f'

*Create list of groups 
*first collect the number of machinery patents per entity
use ${commondata_dir}/orbis_patents/Orbis_patents_list_2017.dta, clear
mmerge appln_id using ${dataset_dir}/patent_list/pats_tfa.dta, unmatched(none)
keep BvD appln_year
keep if appln_year >= 1970 & appln_year <= 1994
collapse (count)appln_year, by(BvD)
ren appln_year num_patents
tempfile numpats
save `numpats'

*get firms in orbis and combine
use ${commondata_dir}/orbis_patents/Orbis_patents_list_2017.dta, clear
keep BvD
duplicates drop
mmerge BvDIDnumber using `numpats', unmatched(master)
mmerge BvDIDnumber using `f', unmatched(master)

*clean and check if in orbis
gen group = BvD
replace group = bvdid_target if _m == 3
drop _m bvdid_target
replace num_patents = 0 if num_patents == .
tempfile groups
save `groups'

*make some noise in case we miss some patents
*find the largest firm in a group
gen num_patents_r = num_patents + runiform()/10
bys group : egen maxpats = max(num_patents_r)
bys group : egen totpats = sum(num_patents)
gen pats_share = num_patents / totpats
bys group : egen max_pats_share = max(pats_share)
drop pats_share totpats
gen BvD_max = BvD if num_patents_r == maxpats

bys group (BvD_max) : replace BvD_max = BvD_max[_N]
drop num_patents* maxpats

ren group group_old
ren BvD_max group_new

compress
save ${dataset_dir}/patstat_orbis/BvD_largest_group_remap.dta, replace



}
if _rc == 0 {
    display "Execution finished successfully."
}
else {
    display "Execution finished with errors."
}

cap log close dat